Temporal overview
p_year %>%
inner_join(poems,by=c("p_id")) %>%
count(collection,year) %>%
mutate(measure="yearly count") %>%
union_all(
p_year %>% # 10 year rolling mean
distinct(year) %>%
left_join(p_year %>% distinct(year),sql_on="RHS.year BETWEEN LHS.year-5 AND LHS.year+5") %>%
inner_join(p_year,by=c("year.y"="year")) %>%
inner_join(poems,by=c("p_id")) %>%
group_by(collection=collection,year=year.x) %>%
summarize(n=n()/10,.groups="drop") %>%
mutate(measure="10 year rolling mean")
) %>%
filter(year>0,year<9999,collection!="literary") %>%
ggplot(aes(x=year,y=n,color=measure)) +
geom_point(data=~.x %>% filter(measure=="yearly count")) +
geom_line(data=~.x %>% filter(measure=="10 year rolling mean")) +
theme_hsci_discrete(base_family="Arial") +
theme(legend.justification=c(0,1), legend.position=c(0.02, 0.98), legend.background = element_blank(), legend.key=element_blank()) +
labs(color=NULL) +
scale_y_continuous(breaks=seq(0,20000,by=2000),labels=scales::comma_format()) +
ylab("Poems") +
scale_x_continuous(breaks=seq(1000,2000,by=50)) +
xlab("Year") +
facet_wrap(~collection, ncol=1) +
ggtitle("Number of poems by year and collection")

p_year %>%
filter(year %in% c(0,9999)) %>%
left_join(poems) %>%
count(collection,year) %>%
ungroup() %>%
gt() %>%
tab_header(title="Abnormal years") %>%
fmt_integer(n)
| collection |
year |
n |
| skvr |
9999 |
469 |
| erab |
0 |
5,443 |
Overview of collectors
poems %>%
distinct(collection) %>%
pull() %>%
map(~p_col %>%
inner_join(poems %>% filter(collection==.x),by=c("p_id")) %>%
count(col_id) %>%
left_join(collectors,by=c("col_id")) %>%
select(col_id,name,n) %>%
collect() %>%
mutate(col_id=fct_reorder(str_c(col_id,"|",name),n)) %>%
mutate(col_id=fct_lump_n(col_id,n=100,w=n)) %>%
mutate(col_id=fct_relevel(col_id,"Other")) %>%
group_by(col_id) %>%
tally(wt=n) %>% {
ggplot(.,aes(x=col_id,y=n)) +
geom_col() +
geom_text(aes(label=p(n)),hjust='left',nudge_y = 100) +
theme_hsci_discrete(base_family="Arial") +
coord_flip() +
labs(title=str_c("Collectors in ",.x))
}
)
## Warning: 1 unknown level in `f`: Other
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

p_col %>%
anti_join(collectors) %>%
count(col_id) %>%
gt() %>%
tab_header(title="Collectors without a name") %>%
fmt_integer(n)
Geographical overview
d <- p_loc %>%
count(loc_id) %>%
inner_join(locations) %>%
select(name,n) %>%
collect()
poems_without_location <- poems %>%
anti_join(p_loc) %>%
count() %>%
pull()
unprojected_locations <- d %>%
anti_join(polygons) %>%
add_row(name=NA,n=poems_without_location)
polygons %>%
left_join(d) %>%
tm_shape() +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(title=str_c("Geographical overview. Missing ",unprojected_locations %>% tally(wt=n) %>% pull() %>% p," poems."))

Poem locations not mapped
unprojected_locations %>%
arrange(desc(n)) %>%
gt() %>%
tab_header("Poem locations not mapped") %>%
fmt_integer(n)
| name |
n |
| Narvusi |
4,982 |
| NA |
4,326 |
| Uusikirkko Vpl |
1,934 |
| välismaa |
1,822 |
| Vuole |
1,679 |
| Pyhäjärvi Vpl |
1,000 |
| Tartu |
973 |
| Viena |
871 |
| Peräpohjola |
842 |
| Pohjois-Pohjanmaa |
809 |
| Itä- ja Pohjois-Inkeri |
799 |
| Etelä-Karjala |
770 |
| Tallinn |
685 |
| Viljandi l. |
602 |
| Pärnu l. |
598 |
| Hämeenlinna |
565 |
| Keski-Inkeri |
532 |
| Viljandimaa |
480 |
| Tulomajärvi |
468 |
| Uusikaupunki |
457 |
| Võrumaa |
452 |
| Kiimaisjärvi |
448 |
| Viron Inkeri |
433 |
| Länsi-Inkeri |
426 |
| Narva l. |
422 |
| Sortavala mlk |
419 |
| Etelä-Savo |
412 |
| Tveri |
404 |
| Länsipohja |
402 |
| Viipuri mlk |
400 |
| Keminmaa |
375 |
| Pieksämäki |
346 |
| Läänemaa |
329 |
| Pyhäjärvi Ol |
324 |
| Rakvere l. |
304 |
| Savonlinna |
254 |
| Pyhäjärvi Ul |
245 |
| Pohjois-Karjala |
240 |
| Säräisniemi |
233 |
| Salo |
209 |
| Pohjois-Savo |
208 |
| Saaremaa |
205 |
| Lahti |
189 |
| Etelä-Pohjanmaa |
181 |
| Uusikirkko Tl |
177 |
| Tornio |
172 |
| Mikkeli mlk |
161 |
| Valga |
150 |
| Koski Hl |
142 |
| Porvoo mlk |
134 |
| Kainuu |
119 |
| Heinola mlk |
117 |
| Kuusankoski |
115 |
| Mänttä |
113 |
| Laatokan Karjala (Raja-Karjala) |
107 |
| Salo |
107 |
| Riihimäki |
104 |
| Helsingin pit |
103 |
| Pärnumaa |
102 |
| Tuutari=Tuuteri |
100 |
| Kotka |
96 |
| Satakunta |
95 |
| Alajärvi |
91 |
| Ruija |
91 |
| Paide l. |
88 |
| Lapväärtti-Lappfjärd |
87 |
| Haapsalu |
86 |
| Kuopio mlk |
77 |
| Toijala |
70 |
| Jyväskylä mlk |
69 |
| Häme |
67 |
| Lappeenranta |
63 |
| Varkaus |
62 |
| Kajaani mlk |
60 |
| Võru l. |
60 |
| Valgamaa |
58 |
| Karjala Tl |
51 |
| Pohjois-Pirkkala |
47 |
| Kovero |
47 |
| Uusimaa |
46 |
| Sõrve |
40 |
| Järvamaa |
39 |
| Virumaa |
36 |
| Novgorodin alue |
34 |
| Tartumaa |
29 |
| Varsinais-Suomi |
26 |
| Hiiumaa |
23 |
| Vaasa |
22 |
| Revonlahti-Revolax |
22 |
| Storfjord |
20 |
| Parainen |
18 |
| Lappi Tl |
17 |
| Vaala |
17 |
| Harjumaa |
17 |
| Karkkila |
16 |
| Kouvola |
16 |
| Lieksa |
16 |
| Rovaniemi mlk |
16 |
| Prunkkala |
15 |
| Kimito |
15 |
| Kerava |
15 |
| Valkeakoski |
15 |
| Kuusisto |
13 |
| Uusikaupunki mlk |
13 |
| Iisalmi mlk |
13 |
| Kokkola-Gamlakarleby |
13 |
| Muoslompolo |
13 |
| Hongonjoki |
12 |
| Mustasaari-Korsholm |
12 |
| Kemiö |
11 |
| Pieksämäki mlk |
11 |
| Pielisensuu |
11 |
| Alakiiminki |
11 |
| Särkisalo |
10 |
| Hamina |
10 |
| Kristiinankaupunki-Kristinestad |
10 |
| Siipyy-Sideby |
10 |
| Nauvo |
9 |
| Taipale (Enontaipale) |
9 |
| Junosuando |
9 |
| Karesuando |
9 |
| Kenjärvi |
9 |
| Jämsänkoski |
8 |
| Kuhmoniemi |
8 |
| Kuolajärvi |
8 |
| Äänislinna |
8 |
| Sulva-Solf-Solv |
7 |
| Muurmanni |
7 |
| Siuntio |
6 |
| Suolahti |
6 |
| Taipale |
6 |
| Uzmana |
6 |
| Suma |
6 |
| Vammala |
5 |
| Rauma mlk |
5 |
| Kistrand |
5 |
| Moseija |
5 |
| Kirkkonummi |
4 |
| Imatra |
4 |
| Uusikaarlepyy-Nykarleby |
4 |
| Yliveteli |
4 |
| Lauritsala |
3 |
| Ruukki |
3 |
| Koutokeino |
3 |
| Riipuskala |
3 |
| Kuressaare l. |
3 |
| Järvenpää |
2 |
| Pietarsaari-Jakobstad |
2 |
| Jepua-Jeppo |
2 |
| Kouta |
2 |
| Tiudia |
2 |
| Jaama |
2 |
| Ikaalinen mlk |
1 |
| Anjalankoski |
1 |
| Myllykoski |
1 |
| Loviisa |
1 |
| Sipoo |
1 |
| Keski-Suomi |
1 |
| Nurmes mlk |
1 |
| Petolahti-Petalax |
1 |
| Raippaluoto-Replot |
1 |
| Oulu mlk |
1 |
| Nordkapp |
1 |
| Maasöy |
1 |
| Muodoslompolo |
1 |
| Pietari=Leningrad |
1 |
| Siestarjoki |
1 |
| Ahvenanmaa |
1 |
Geographical overview by collection
d <- p_loc %>%
left_join(poems) %>%
count(collection,loc_id) %>%
ungroup() %>%
inner_join(locations) %>%
select(collection,name,n) %>%
collect()
poems_without_location <- poems %>%
anti_join(p_loc) %>%
count(collection) %>%
collect() %>%
mutate(name=NA_character_)
unprojected_locations <- d %>%
anti_join(polygons) %>%
union_all(poems_without_location)
poems %>%
distinct(collection) %>%
pull() %>%
map(~
tm_shape(
polygons %>%
left_join(
p_loc %>%
inner_join(poems %>% filter(collection==.x),by=c("p_id")) %>%
count(loc_id) %>%
inner_join(locations) %>%
select(name,n) %>%
collect()
)
) +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(title=str_c("Geography of ",.x,". Missing ",unprojected_locations %>% filter(collection==.x) %>% tally(wt=n) %>% pull() %>% p," poems."))
)
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

Poem locations not mapped by collection
poems %>%
distinct(collection) %>%
pull() %>%
map(~
unprojected_locations %>%
filter(collection==.x) %>%
arrange(desc(n)) %>%
select(-collection) %>%
gt() %>%
tab_header(str_c("Poem locations not mapped in ",.x)) %>%
fmt_integer(n)
)
[[1]]
|
name
|
n
|
|
Narvusi
|
3,202
|
|
Vuole
|
1,524
|
|
Uusikirkko Vpl
|
1,131
|
|
Pyhäjärvi Vpl
|
785
|
|
Etelä-Karjala
|
658
|
|
Pohjois-Pohjanmaa
|
557
|
|
Länsipohja
|
396
|
|
Kiimaisjärvi
|
363
|
|
Etelä-Savo
|
350
|
|
Tveri
|
294
|
|
Viipuri mlk
|
277
|
|
Salo
|
198
|
|
Pieksämäki
|
183
|
|
Pyhäjärvi Ol
|
160
|
|
Koski Hl
|
134
|
|
Pohjois-Karjala
|
129
|
|
Uusikirkko Tl
|
116
|
|
Tulomajärvi
|
115
|
|
Tornio
|
113
|
|
Porvoo mlk
|
112
|
|
Pyhäjärvi Ul
|
102
|
|
Pohjois-Savo
|
98
|
|
Viena
|
98
|
|
Satakunta
|
83
|
|
Säräisniemi
|
74
|
|
Uusikaupunki
|
71
|
|
Heinola mlk
|
62
|
|
Häme
|
61
|
|
Ruija
|
60
|
|
Jyväskylä mlk
|
52
|
|
Keski-Inkeri
|
49
|
|
Hämeenlinna
|
48
|
|
Kovero
|
47
|
|
Tuutari=Tuuteri
|
45
|
|
Laatokan Karjala (Raja-Karjala)
|
44
|
|
Itä- ja Pohjois-Inkeri
|
42
|
|
Kainuu
|
38
|
|
Kuopio mlk
|
35
|
|
Peräpohjola
|
33
|
|
Varkaus
|
30
|
|
Kajaani mlk
|
30
|
|
Etelä-Pohjanmaa
|
27
|
|
Keminmaa
|
26
|
|
Varsinais-Suomi
|
24
|
|
Mänttä
|
22
|
|
Novgorodin alue
|
22
|
|
Kotka
|
20
|
|
Mikkeli mlk
|
17
|
|
Karjala Tl
|
16
|
|
Alajärvi
|
16
|
|
Kimito
|
15
|
|
Prunkkala
|
14
|
|
Savonlinna
|
14
|
|
Sortavala mlk
|
13
|
|
Revonlahti-Revolax
|
12
|
|
Alakiiminki
|
11
|
|
Taipale (Enontaipale)
|
9
|
|
Helsingin pit
|
7
|
|
Sulva-Solf-Solv
|
7
|
|
Siuntio
|
6
|
|
Mustasaari-Korsholm
|
6
|
|
Siipyy-Sideby
|
6
|
|
Uzmana
|
6
|
|
Suma
|
6
|
|
Lapväärtti-Lappfjärd
|
5
|
|
Vaasa
|
5
|
|
Lappeenranta
|
4
|
|
Kenjärvi
|
4
|
|
Vammala
|
3
|
|
Kirkkonummi
|
3
|
|
Kokkola-Gamlakarleby
|
3
|
|
Kuolajärvi
|
3
|
|
Uusimaa
|
2
|
|
Iisalmi mlk
|
2
|
|
Pietarsaari-Jakobstad
|
2
|
|
Jepua-Jeppo
|
2
|
|
Kouta
|
2
|
|
Länsi-Inkeri
|
2
|
|
Uusikaupunki mlk
|
1
|
|
Anjalankoski
|
1
|
|
Keski-Suomi
|
1
|
|
Lieksa
|
1
|
|
Raippaluoto-Replot
|
1
|
|
Kuhmoniemi
|
1
|
[[2]]
|
name
|
n
|
|
NA
|
2,008
|
|
välismaa
|
1,822
|
|
Tartu
|
973
|
|
Tallinn
|
685
|
|
Viljandi l.
|
602
|
|
Pärnu l.
|
598
|
|
Viljandimaa
|
480
|
|
Võrumaa
|
452
|
|
Narva l.
|
422
|
|
Läänemaa
|
329
|
|
Rakvere l.
|
304
|
|
Saaremaa
|
205
|
|
Valga
|
150
|
|
Pärnumaa
|
102
|
|
Paide l.
|
88
|
|
Haapsalu
|
86
|
|
Võru l.
|
60
|
|
Valgamaa
|
58
|
|
Sõrve
|
40
|
|
Järvamaa
|
39
|
|
Virumaa
|
36
|
|
Tartumaa
|
29
|
|
Hiiumaa
|
23
|
|
Harjumaa
|
17
|
|
Kuressaare l.
|
3
|
[[3]]
|
name
|
n
|
|
Narvusi
|
1,780
|
|
NA
|
1,591
|
|
Peräpohjola
|
809
|
|
Uusikirkko Vpl
|
803
|
|
Viena
|
773
|
|
Itä- ja Pohjois-Inkeri
|
757
|
|
Hämeenlinna
|
517
|
|
Keski-Inkeri
|
483
|
|
Viron Inkeri
|
433
|
|
Länsi-Inkeri
|
424
|
|
Sortavala mlk
|
406
|
|
Uusikaupunki
|
386
|
|
Tulomajärvi
|
353
|
|
Keminmaa
|
349
|
|
Pohjois-Pohjanmaa
|
252
|
|
Savonlinna
|
240
|
|
Pyhäjärvi Vpl
|
215
|
|
Lahti
|
189
|
|
Pyhäjärvi Ol
|
164
|
|
Pieksämäki
|
163
|
|
Säräisniemi
|
159
|
|
Vuole
|
155
|
|
Etelä-Pohjanmaa
|
154
|
|
Mikkeli mlk
|
144
|
|
Pyhäjärvi Ul
|
143
|
|
Viipuri mlk
|
123
|
|
Kuusankoski
|
115
|
|
Etelä-Karjala
|
112
|
|
Pohjois-Karjala
|
111
|
|
Pohjois-Savo
|
110
|
|
Tveri
|
110
|
|
Salo
|
107
|
|
Riihimäki
|
104
|
|
Helsingin pit
|
96
|
|
Mänttä
|
91
|
|
Kiimaisjärvi
|
85
|
|
Lapväärtti-Lappfjärd
|
82
|
|
Kainuu
|
81
|
|
Kotka
|
76
|
|
Alajärvi
|
75
|
|
Toijala
|
70
|
|
Laatokan Karjala (Raja-Karjala)
|
63
|
|
Etelä-Savo
|
62
|
|
Uusikirkko Tl
|
61
|
|
Lappeenranta
|
59
|
|
Tornio
|
59
|
|
Heinola mlk
|
55
|
|
Tuutari=Tuuteri
|
55
|
|
Pohjois-Pirkkala
|
47
|
|
Uusimaa
|
44
|
|
Kuopio mlk
|
42
|
|
Karjala Tl
|
35
|
|
Varkaus
|
32
|
|
Ruija
|
31
|
|
Kajaani mlk
|
30
|
|
Porvoo mlk
|
22
|
|
Storfjord
|
20
|
|
Parainen
|
18
|
|
Lappi Tl
|
17
|
|
Jyväskylä mlk
|
17
|
|
Vaasa
|
17
|
|
Vaala
|
17
|
|
Karkkila
|
16
|
|
Kouvola
|
16
|
|
Rovaniemi mlk
|
16
|
|
Kerava
|
15
|
|
Valkeakoski
|
15
|
|
Lieksa
|
15
|
|
Kuusisto
|
13
|
|
Muoslompolo
|
13
|
|
Uusikaupunki mlk
|
12
|
|
Hongonjoki
|
12
|
|
Satakunta
|
12
|
|
Novgorodin alue
|
12
|
|
Kemiö
|
11
|
|
Salo
|
11
|
|
Pieksämäki mlk
|
11
|
|
Iisalmi mlk
|
11
|
|
Pielisensuu
|
11
|
|
Särkisalo
|
10
|
|
Hamina
|
10
|
|
Kokkola-Gamlakarleby
|
10
|
|
Kristiinankaupunki-Kristinestad
|
10
|
|
Revonlahti-Revolax
|
10
|
|
Nauvo
|
9
|
|
Junosuando
|
9
|
|
Karesuando
|
9
|
|
Jämsänkoski
|
8
|
|
Koski Hl
|
8
|
|
Äänislinna
|
8
|
|
Kuhmoniemi
|
7
|
|
Muurmanni
|
7
|
|
Häme
|
6
|
|
Suolahti
|
6
|
|
Taipale
|
6
|
|
Mustasaari-Korsholm
|
6
|
|
Länsipohja
|
6
|
|
Rauma mlk
|
5
|
|
Kuolajärvi
|
5
|
|
Kistrand
|
5
|
|
Moseija
|
5
|
|
Kenjärvi
|
5
|
|
Imatra
|
4
|
|
Uusikaarlepyy-Nykarleby
|
4
|
|
Siipyy-Sideby
|
4
|
|
Yliveteli
|
4
|
|
Lauritsala
|
3
|
|
Ruukki
|
3
|
|
Koutokeino
|
3
|
|
Riipuskala
|
3
|
|
Varsinais-Suomi
|
2
|
|
Vammala
|
2
|
|
Järvenpää
|
2
|
|
Tiudia
|
2
|
|
Jaama
|
2
|
|
Prunkkala
|
1
|
|
Ikaalinen mlk
|
1
|
|
Myllykoski
|
1
|
|
Kirkkonummi
|
1
|
|
Loviisa
|
1
|
|
Sipoo
|
1
|
|
Nurmes mlk
|
1
|
|
Petolahti-Petalax
|
1
|
|
Oulu mlk
|
1
|
|
Nordkapp
|
1
|
|
Maasöy
|
1
|
|
Muodoslompolo
|
1
|
|
Pietari=Leningrad
|
1
|
|
Siestarjoki
|
1
|
|
Ahvenanmaa
|
1
|
[[4]]
Spatiotemporal overview
d <- poems %>%
left_join(p_year %>% mutate(year=if_else(year %in% c(0L,9999L),NA,year))) %>%
collect() %>%
mutate(year_ntile=ntile(year,11)) %>%
group_by(year_ntile) %>%
mutate(years=str_c(min(year),"-",max(year))) %>%
ungroup() %>%
left_join(p_loc %>% collect()) %>%
count(years,loc_id) %>%
ungroup() %>%
left_join(locations %>% select(loc_id,name) %>% collect())
polygons %>%
left_join(d %>% complete(name,years)) %>%
tm_shape() +
tm_polygons(col='n', id='name', style='fisher', palette='plasma') +
tm_layout(main.title="Geographical overviews by time",legend.outside.size=0.1) +
tm_facets(by="years",ncol=4)

Poem verse statistics
Line types
d <- verses %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,type) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
d %>%
group_by(collection) %>%
mutate(proportion=n/sum(n)) %>%
gt() %>%
fmt_integer(n) %>%
fmt_percent(proportion)
| type |
n |
proportion |
| skvr |
| V |
1,340,987 |
94.63% |
| L |
44,303 |
3.13% |
| CPT |
27,869 |
1.97% |
| K |
3,931 |
0.28% |
| erab |
| V |
1,861,583 |
93.39% |
| PAG |
53,040 |
2.66% |
| CPT |
19,844 |
1.00% |
| L |
18,465 |
0.93% |
| TYH |
18,357 |
0.92% |
| REF |
17,869 |
0.90% |
| LRY |
3,868 |
0.19% |
| RRE |
307 |
0.02% |
| MRK |
52 |
0.00% |
| U |
38 |
0.00% |
| LLI |
2 |
0.00% |
| TYP |
1 |
0.00% |
| jr |
| V |
812,343 |
90.94% |
| L |
49,411 |
5.53% |
| CPT |
28,030 |
3.14% |
| K |
3,502 |
0.39% |
| literary |
| V |
82,460 |
97.54% |
| L |
1,220 |
1.44% |
| CPT |
777 |
0.92% |
| K |
87 |
0.10% |
Verse line lengths
d_nr_characters <- verses_cl %>%
mutate(nr_characters=str_length(text)) %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,nr_characters) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
d_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos),.groups="drop") %>%
left_join(verse_poem) %>%
left_join(poems) %>%
count(collection,nr_words) %>%
ungroup() %>%
arrange(collection,desc(n)) %>%
collect()
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.
Verse line lengths in characters
d_nr_characters %>%
filter(nr_characters<=60) %>%
ggplot(aes(x=nr_characters,y=n)) +
geom_col(width=1) +
facet_wrap(~collection,scales="free_y") +
theme_hsci_discrete(base_family="Arial") +
scale_y_continuous(labels=scales::comma_format()) +
xlab("Number of characters") +
ylab("Verses") +
labs(title="Number of characters in verse lines")

d_nr_characters %>%
group_by(collection) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_characters<=60) %>%
ggplot(aes(x=nr_characters,y=collection,fill=collection,height=prop)) +
geom_density_ridges(stat='identity') +
theme_hsci_discrete(base_family="Arial") +
# scale_y_continuous(labels=scales::percent_format()) +
xlab("Number of characters") +
ylab("Verses") +
labs(title="Number of characters in verse lines")

Verse lines with more than 60 characters
d_nr_characters %>%
mutate(nl=if_else(nr_characters>60,n,0L)) %>%
group_by(collection) %>%
summarise(lines=sum(nl),proportion=sum(nl)/sum(n),.groups="drop") %>%
arrange(desc(lines)) %>%
gt() %>%
tab_header(title="Verse lines with more than 60 characters") %>%
fmt_integer(lines) %>%
fmt_percent(proportion)
| collection |
lines |
proportion |
| jr |
1,911 |
0.24% |
| erab |
291 |
0.02% |
| skvr |
202 |
0.02% |
| literary |
1 |
0.00% |
Verse line lengths in words
d_nr_words %>%
filter(nr_words<=10) %>%
ggplot(aes(x=nr_words,y=n)) +
geom_col(width=1) +
facet_wrap(~collection,scales="free_y") +
scale_x_continuous(breaks=seq(0,10,by=2)) +
scale_y_continuous(labels=scales::comma_format()) +
theme_hsci_discrete(base_family="Arial") +
xlab("Number of words") +
ylab("Verses") +
labs(title="Number of words in verse lines")

d_nr_words %>%
filter(nr_words<=10) %>%
uncount(n) %>%
ggplot(aes(x=nr_words,y=collection,fill=collection)) +
stat_binline(binwidth=1) +
theme_hsci_discrete(base_family="Arial") +
scale_x_continuous(breaks=seq(0,10,by=2)) +
xlab("Number of words") +
ylab("Verses") +
# scale_y_continuous(labels=scales::percent_format()) +
labs(title="Number of words in verse lines")

Verse lines with more than 10 words
d_nr_words %>%
mutate(nl=if_else(nr_words>10,n,0L)) %>%
group_by(collection) %>%
summarise(lines=sum(nl),proportion=sum(nl)/sum(n),.groups="drop") %>%
arrange(desc(lines)) %>%
gt() %>%
tab_header(title="Verse lines with more than 10 words") %>%
fmt_integer(lines) %>%
fmt_percent(proportion)
| collection |
lines |
proportion |
| jr |
839 |
0.11% |
| erab |
257 |
0.01% |
| skvr |
38 |
0.00% |
| literary |
9 |
0.01% |
verse_nr_words <- word_occ %>%
group_by(v_id) %>%
summarise(nr_words=max(pos)) %>%
compute_a(unique_indexes=list(c("v_id","nr_words")))
word_nr_characters <- words %>%
mutate(nr_characters=str_length(text)) %>%
select(w_id,nr_characters) %>%
compute_a(unique_indexes=list(c("w_id","nr_characters")))
d <- word_occ %>%
left_join(word_nr_characters) %>%
left_join(verse_nr_words) %>%
left_join(verse_poem %>% select(-pos),by=c("v_id")) %>%
left_join(poems) %>%
count(collection,nr_words,pos,nr_characters) %>%
collect()
d %>%
group_by(collection,nr_words,pos) %>%
mutate(prop=n/sum(n)) %>%
ungroup() %>%
filter(nr_words>=2,nr_words<6) %>%
mutate(nr_words=as_factor(nr_words),pos=as_factor(pos)) %>%
uncount(n) %>%
ggplot(aes(x=nr_characters,y=nr_words,fill=nr_words)) +
stat_binline(binwidth=1) +
facet_grid(collection~pos,labeller = labeller(pos=label_both)) +
xlab("Number of characters in word") +
ylab("Number of words in verse") +
labs(
title="Number of characters in words by their position",
subtitle="According to length of verse and collection"
) +
theme_hsci_discrete(base_family="Arial")
